RLOOP_PEAKS <- list("RL by counts" = "analyses/RLoop_Table_Analysis/data/rloops/strategy.a__10bp__peaks.narrowPeak",
                    "RL by mean signal" = "analyses/RLoop_Table_Analysis/data/rloops/strategy.b__10bp__peaks.narrowPeak",
                    "RL by mean qval" = "analyses/RLoop_Table_Analysis/data/rloops/strategy.c__10bp__peaks.narrowPeak")
MAX_RL_SIZE <- 5000

library(magrittr)
library(tidyverse)
library(ChIPpeakAnno)
library(enrichR)
library(EnsDb.Hsapiens.v86)
library(TxDb.Hsapiens.UCSC.hg38.knownGene)
library(ChIPseeker)
# Annotations
annoData <- toGRanges(EnsDb.Hsapiens.v86, feature = "gene")
# TSS region
tss <- getPromoters(TxDb=TxDb.Hsapiens.UCSC.hg38.knownGene, 
                    upstream=3000, 
                    downstream=3000)
# Annotation function
annoRL <- function(gr, max_rl_size) {
  gr <- gr[width(gr) < max_rl_size,]
  gr <- keepStandardChromosomes(gr, pruning.mode = "coarse")
  anno <- annotatePeakInBatch(myPeakList = gr, AnnotationData = annoData, 
                      output = "overlapping")
  anno <- anno[! is.na(anno$feature),]
  annodf <- as.data.frame(anno)
  mapping <- AnnotationDbi::select(EnsDb.Hsapiens.v86, 
                                   keys = keys(EnsDb.Hsapiens.v86), 
                                   columns = "SYMBOL")
  annodf <- left_join(annodf, mapping , by = c('feature' = "GENEID"))
  anno$SYMBOL <- annodf$SYMBOL 
  return(anno)
}

# Read RLoops
readRL <- function(x, max_rl_size) {
  readr::read_tsv(x, comment = "#", col_names = c(
    "seqnames", "start", "end", "name", "score", "strand", "signalValue", "pval", "qval", "peak"
  )) %>%
    dplyr::filter(end - start < max_rl_size) %>%
    as.data.frame() %>%
    toGRanges() 
}

Overview

We evaluated the output from the various strategies used to generate a set of standardized Rloops.

Results

rloops <- lapply(RLOOP_PEAKS, readRL, max_rl_size=MAX_RL_SIZE)
tagMatrixList <- lapply(
  rloops, function(peaks) {
    getTagMatrix(peaks, weightCol = "score", windows = tss)
  }
)
peakAnnoList <- lapply(
  rloops, function(peaks) {
    annotatePeak(peaks, verbose = FALSE, TxDb = TxDb.Hsapiens.UCSC.hg38.knownGene)
  }
)
geneLst <- lapply(rloops, function(gr) {
  anno <- annoRL(gr, max_rl_size = MAX_RL_SIZE) %>% as.data.frame()
  return(anno)
})
plotAvgProf(tagMatrixList, xlim=c(-3000, 3000))

plotDistToTSS(peakAnnoList)

plotAnnoBar(peakAnnoList)

Gene-level analysis

enrichLinks <- plyr::llply(names(geneLst), function(groupNow) {
  genePeaks <- geneLst[[groupNow]]
  genesNow <- genePeaks %>%
    dplyr::filter(! is.na(SYMBOL)) %>%
    pull(SYMBOL) %>%
    unique()
  if (length(genesNow) > 5000) {
    genesNow <- sample(genesNow, 5000)
  }
  response <- httr::POST(url = 'https://maayanlab.cloud/Enrichr/addList', body = list(
    'list' = paste0(genesNow, collapse = "\n"),
    'description' = groupNow
  ))
  jsonlite::fromJSON(httr::content(response, as = "text"))
})
names(enrichLinks) <- names(geneLst)

We then performed pathway enrichment on the genes overlapping with DRLA peaks and found that the pathways with increasing R-loops in E7107 treatment were largely related to spliceosome.

res <- lapply(names(geneLst), function(groupNow) {
  genesNow <- geneLst[[groupNow]]
  if (length(genesNow) > 5000) {
    genesNow <- sample(genesNow, 5000)
  }
  permalinkNow <- enrichLinks[[groupNow]]$shortId
  knitr::knit_child(
    'pathEnrich_template.Rmd', envir = environment(), quiet = TRUE
  )
})
cat(unlist(res), sep = '\n')

RL by counts

permalink <- paste0("https://maayanlab.cloud/Enrichr/enrich?dataset=", permalinkNow[1])

Pathway enrichment was calculated for RL by counts using enrichr. The permalink to this analysis is available here.

Here are some selected pathways which represents these results:

dbsNow <- c("KEGG_2021_Human", "GO_Biological_Process_2021",
            "ChEA_2016", "BioPlanet_2019")
maxval <- 2000
log <- capture.output(enres <- enrichr(genesNow, databases = dbsNow))
print(enres)
## $KEGG_2021_Human
##                X.
## 1 "expired": true
## 2               }
## 
## $GO_Biological_Process_2021
##                X.
## 1 "expired": true
## 2               }
## 
## $ChEA_2016
##                X.
## 1 "expired": true
## 2               }
## 
## $BioPlanet_2019
##                X.
## 1 "expired": true
## 2               }
# plotLst <- lapply(names(enres), function(enrNow) {
#   enrNowData <- enres[[enrNow]]
#   enrNowData %>%
#     slice_max(order_by = Combined.Score, n = 10) %>%
#     # dplyr::mutate(Combined.Score = case_when(Combined.Score > maxval ~ maxval,
#     #                                          TRUE ~ Combined.Score)) %>%
#     dplyr::mutate(logP = -log10(Adjusted.P.value)) %>%
#     dplyr::mutate(Term = substr(Term, 1, 50)) %>%
#     dplyr::mutate(Term = stringr::str_wrap(Term,  width = 40)) %>%
#     arrange(Combined.Score) %>%
#     dplyr::mutate(Term = factor(Term, levels = unique(Term))) %>%
#     ggplot(aes(x = Term, y = Combined.Score, fill = logP)) +
#     geom_col() +
#     ggpubr::rotate() +
#     xlab(NULL) +
#     ylab(paste0("Combined Score (max ", maxval, ")")) +
#     scale_y_continuous(expand = c(0, 1)) +
#     labs(title = enrNow, subtitle = paste0("DRLA: ", groupNow)) +
#     theme_classic(base_size = 11)
# })
# ggpubr::ggarrange(plotlist = plotLst, align = "v")

Here are the genes which were found in the overlap group:

tibble(
  "Genes" = genesNow
) %>%
  DT::datatable(extensions = 'Buttons', 
                options = list(dom = 'Bfrtip',
                               buttons = list(list(extend='copy'),
                                              list(extend='csv', filename = groupNow),
                                              list(extend = 'excel', filename = groupNow))))

RL by mean signal

permalink <- paste0("https://maayanlab.cloud/Enrichr/enrich?dataset=", permalinkNow[1])

Pathway enrichment was calculated for RL by mean signal using enrichr. The permalink to this analysis is available here.

Here are some selected pathways which represents these results:

dbsNow <- c("KEGG_2021_Human", "GO_Biological_Process_2021",
            "ChEA_2016", "BioPlanet_2019")
maxval <- 2000
log <- capture.output(enres <- enrichr(genesNow, databases = dbsNow))
print(enres)
## $KEGG_2021_Human
## [1] Term                 Overlap              P.value             
## [4] Adjusted.P.value     Old.P.value          Old.Adjusted.P.value
## [7] Odds.Ratio           Combined.Score       Genes               
## <0 rows> (or 0-length row.names)
## 
## $GO_Biological_Process_2021
## [1] Term                 Overlap              P.value             
## [4] Adjusted.P.value     Old.P.value          Old.Adjusted.P.value
## [7] Odds.Ratio           Combined.Score       Genes               
## <0 rows> (or 0-length row.names)
## 
## $ChEA_2016
## [1] Term                 Overlap              P.value             
## [4] Adjusted.P.value     Old.P.value          Old.Adjusted.P.value
## [7] Odds.Ratio           Combined.Score       Genes               
## <0 rows> (or 0-length row.names)
## 
## $BioPlanet_2019
## [1] Term                 Overlap              P.value             
## [4] Adjusted.P.value     Old.P.value          Old.Adjusted.P.value
## [7] Odds.Ratio           Combined.Score       Genes               
## <0 rows> (or 0-length row.names)
# plotLst <- lapply(names(enres), function(enrNow) {
#   enrNowData <- enres[[enrNow]]
#   enrNowData %>%
#     slice_max(order_by = Combined.Score, n = 10) %>%
#     # dplyr::mutate(Combined.Score = case_when(Combined.Score > maxval ~ maxval,
#     #                                          TRUE ~ Combined.Score)) %>%
#     dplyr::mutate(logP = -log10(Adjusted.P.value)) %>%
#     dplyr::mutate(Term = substr(Term, 1, 50)) %>%
#     dplyr::mutate(Term = stringr::str_wrap(Term,  width = 40)) %>%
#     arrange(Combined.Score) %>%
#     dplyr::mutate(Term = factor(Term, levels = unique(Term))) %>%
#     ggplot(aes(x = Term, y = Combined.Score, fill = logP)) +
#     geom_col() +
#     ggpubr::rotate() +
#     xlab(NULL) +
#     ylab(paste0("Combined Score (max ", maxval, ")")) +
#     scale_y_continuous(expand = c(0, 1)) +
#     labs(title = enrNow, subtitle = paste0("DRLA: ", groupNow)) +
#     theme_classic(base_size = 11)
# })
# ggpubr::ggarrange(plotlist = plotLst, align = "v")

Here are the genes which were found in the overlap group:

tibble(
  "Genes" = genesNow
) %>%
  DT::datatable(extensions = 'Buttons', 
                options = list(dom = 'Bfrtip',
                               buttons = list(list(extend='copy'),
                                              list(extend='csv', filename = groupNow),
                                              list(extend = 'excel', filename = groupNow))))

RL by mean qval

permalink <- paste0("https://maayanlab.cloud/Enrichr/enrich?dataset=", permalinkNow[1])

Pathway enrichment was calculated for RL by mean qval using enrichr. The permalink to this analysis is available here.

Here are some selected pathways which represents these results:

dbsNow <- c("KEGG_2021_Human", "GO_Biological_Process_2021",
            "ChEA_2016", "BioPlanet_2019")
maxval <- 2000
log <- capture.output(enres <- enrichr(genesNow, databases = dbsNow))
print(enres)
## $KEGG_2021_Human
## [1] Term                 Overlap              P.value             
## [4] Adjusted.P.value     Old.P.value          Old.Adjusted.P.value
## [7] Odds.Ratio           Combined.Score       Genes               
## <0 rows> (or 0-length row.names)
## 
## $GO_Biological_Process_2021
## [1] Term                 Overlap              P.value             
## [4] Adjusted.P.value     Old.P.value          Old.Adjusted.P.value
## [7] Odds.Ratio           Combined.Score       Genes               
## <0 rows> (or 0-length row.names)
## 
## $ChEA_2016
## [1] Term                 Overlap              P.value             
## [4] Adjusted.P.value     Old.P.value          Old.Adjusted.P.value
## [7] Odds.Ratio           Combined.Score       Genes               
## <0 rows> (or 0-length row.names)
## 
## $BioPlanet_2019
## [1] Term                 Overlap              P.value             
## [4] Adjusted.P.value     Old.P.value          Old.Adjusted.P.value
## [7] Odds.Ratio           Combined.Score       Genes               
## <0 rows> (or 0-length row.names)
# plotLst <- lapply(names(enres), function(enrNow) {
#   enrNowData <- enres[[enrNow]]
#   enrNowData %>%
#     slice_max(order_by = Combined.Score, n = 10) %>%
#     # dplyr::mutate(Combined.Score = case_when(Combined.Score > maxval ~ maxval,
#     #                                          TRUE ~ Combined.Score)) %>%
#     dplyr::mutate(logP = -log10(Adjusted.P.value)) %>%
#     dplyr::mutate(Term = substr(Term, 1, 50)) %>%
#     dplyr::mutate(Term = stringr::str_wrap(Term,  width = 40)) %>%
#     arrange(Combined.Score) %>%
#     dplyr::mutate(Term = factor(Term, levels = unique(Term))) %>%
#     ggplot(aes(x = Term, y = Combined.Score, fill = logP)) +
#     geom_col() +
#     ggpubr::rotate() +
#     xlab(NULL) +
#     ylab(paste0("Combined Score (max ", maxval, ")")) +
#     scale_y_continuous(expand = c(0, 1)) +
#     labs(title = enrNow, subtitle = paste0("DRLA: ", groupNow)) +
#     theme_classic(base_size = 11)
# })
# ggpubr::ggarrange(plotlist = plotLst, align = "v")

Here are the genes which were found in the overlap group:

tibble(
  "Genes" = genesNow
) %>%
  DT::datatable(extensions = 'Buttons', 
                options = list(dom = 'Bfrtip',
                               buttons = list(list(extend='copy'),
                                              list(extend='csv', filename = groupNow),
                                              list(extend = 'excel', filename = groupNow))))